import onnxruntime as ort
import numpy as np
import time

# 加载量化后的模型
onnx_model_path = "/home/lurker/文档/playground/python/pytorch/quantized_mnist_model.onnx"
session = ort.InferenceSession(onnx_model_path)

# 准备输入数据
dummy_input = np.random.randn(1, 1, 28, 28).astype(np.float32)

# 推理量化后的模型
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

start_time = time.time()
session.run([output_name], {input_name: dummy_input})
end_time = time.time()

print(f"量化模型推理时间：{end_time - start_time:.6f} 秒")
